perm filename XP.TEX[TEX,DEK] blob
sn#841372 filedate 1987-06-14 generic text, type T, neo UTF8
% exercises for TeX: The Program
\hsize=6.5in \vsize=8.75in % TUGboat (cleared by BB, Jan 87)
\font\tenbxsl=cmbxsl10 % font for slanted copy in title
\font\eightrm=cmr8
\font\logo=logo10
% the following macros were used to make all the class handouts
\def\\#1{\hbox{\it#1\/\kern.05em}} % italic type for identifiers
\font\tentex=cmtex10 % TeX extended character set (used in strings)
\hyphenchar\tentt=-1 \hyphenchar\tentex=-1
\def\.#1{\hbox{\tentex % typewriter type for strings
\let\'=\RQ % right quote in a string
\let\`=\LQ % left quote in a string
#1}}
\def\LQ{{\tt\char'22}} % left quote in a string
\def\RQ{{\tt\char'23}} % right quote in a string
\def\O#1{\hbox{\rm\char'23\kern-.2em\it#1\/\kern.05em}} % octal constant
\def\begintt{$$\ttverbatim \catcode`\|=0 \parskip=0pt \ttfinish}
\chardef\other=12
\def\ttverbatim{\begingroup
\catcode`\\=\other \catcode`\{=\other \catcode`\}=\other
\catcode`\$=\other \catcode`\&=\other \catcode`\#=\other
\catcode`\%=\other \catcode`\~=\other \catcode`\_=\other
\catcode`\↑=\other
\obeyspaces \obeylines \tt}
{\obeyspaces\gdef {\ }}
{\catcode`\|=0 |catcode`|\=\other % | is temporary escape character
|obeylines % end of line is active
|gdef|ttfinish#1↑↑M#2\endtt{#1|vbox{#2}|endgroup$$}}
\def\|{{\tt\char`\|}}
\catcode`\|=\active \def|{\ttverbatim\let|=\endgroup}
\def\prob#1. {\medbreak\noindent\hbox to20pt{\bf #1.\hfil}}
\pageno=10
% macros copied from WEBMAC will be used for the rest of this paper!
\let\sec=\S
\let\!=\|
\begingroup
\catcode`\@=\other
\catcode`\"=\other
\parskip 0pt % no stretch between paragraphs
\def\\#1{\hbox{\it#1\/\kern.05em}} % italic type for identifiers
\def\|#1{\hbox{$#1$}} % one-letter identifiers look a bit better this way
\def\{\hbox{\bf#1\/}} % boldface type for reserved words
\def\.#1{\hbox{\tentex % typewriter type for strings
\let\\=\BS % backslash in a string
\let\'=\RQ % right quote in a string
\let\`=\LQ % left quote in a string
\let\{=\LB % left brace in a string
\let\}=\RB % right brace in a string
\let\~=\TL % tilde in a string
\let\ =\SP % space in a string
\let\_=\UL % underline in a string
\let\&=\AM % ampersand in a string
\let\|=\!% vertical line in a string
#1}}
\def\#{\hbox{\tt\char`\#}} % parameter sign
\def\${\hbox{\tt\char`\$}} % dollar sign
\def\%{\hbox{\tt\char`\%}} % percent sign
\def\↑{\ifmmode\mathchar"222 \else\char`↑ \fi} % pointer or hat
% circumflex accents can be obtained from \↑↑D instead of \↑
\chardef\AM=`\& % ampersand character in a string
\chardef\BS=`\\ % backslash in a string
\chardef\LB=`\{ % left brace in a string
\def\LQ{{\tt\char'22}} % left quote in a string
\chardef\RB=`\} % right brace in a string
\def\RQ{{\tt\char'23}} % right quote in a string
\def\SP{{\tt\char`\ }} % (visible) space in a string
\chardef\TL=`\~ % tilde in a string
\chardef\UL=`\_ % underline character in a string
\newbox\bak \setbox\bak=\hbox to -1em{} % backspace one em
\newbox\bakk\setbox\bakk=\hbox to -2em{} % backspace two ems
\newcount\ind % current indentation in ems
\def\1{\global\advance\ind by1\hangindent\ind em} % indent one more notch
\def\2{\global\advance\ind by-1} % indent one less notch
\def\3#1{\hfil\penalty#10\hfilneg} % optional break within a statement
\def\4{\copy\bak} % backspace one notch
\def\5{\hfil\penalty-1\hfilneg\kern2.5em\copy\bakk\ignorespaces}% optional break
\def\6{\ifmmode\else\par % forced break
\hangindent\ind em\noindent\kern\ind em\copy\bakk\ignorespaces\fi}
\def\7{\Y\6} % forced break and a little extra space
\let\yskip=\smallskip
\def\to{\mathrel{.\,.}} % double dot, used only in math mode
\def\note#1#2.{\Y\noindent{\hangindent2em\baselineskip10pt\eightrm#1 #2.\par}}
\def\startsection{\Q\noindent\strut{\bf\modno.\quad}}
\def\defin#1{\global\advance\ind by 2 \1\&{#1 }} % begin `define' or `format'
\def\A{\note{See also}} % cross-reference for multiply defined section names
\def\B{\mathopen{\.{@\{}}} % begin controlled comment
\def\C#1{\ifmmode\gdef\XX{\null$\null}\else\gdef\XX{}\fi % Pascal comments
\XX\hfil\penalty-1\hfilneg\quad$\{\,$#1$\,\}$\XX}
\def\D{\defin{define}} % macro definition
\def\E{\cdot10↑} % exponent in floating point constant
\def\F{\defin{format}} % format definition
\let\G=\ge % greater than or equal sign
\def\H#1{\hbox{\rm\char"7D\tt#1}} % hexadecimal constant
\let\I=\ne % unequal sign
\def\J{\.{@\&}} % TANGLE's join operation
\let\K=\gets % left arrow
\let\L=\le % less than or equal sign
\outer\def\M#1.{\bigbreak\def\modno{#1}\startsection\ignorespaces\iftrue}
\def\O#1{\hbox{\rm\char'23\kern-.2em\it#1\/\kern.05em}} % octal constant
\def\P{\rightskip=0pt plus 100pt minus 10pt % go into Pascal mode
\leftskip=20pt
\parindent 1em % for paragraphs and for the first line of Pascal text
\sfcode`;=3000
\pretolerance 10000
\hyphenpenalty 10000 \exhyphenpenalty 10000
\global\ind=2 \1\ \unskip}
\def\Q{\rightskip=0pt % get out of Pascal mode
\parindent20pt \leftskip=0pt
\sfcode`;=1500 \pretolerance 200 \hyphenpenalty 50 \exhyphenpenalty 50 }
\let\R=\lnot % logical not
\let\S=\equiv % equivalence sign
\def\T{\mathclose{\.{@\}}}} % terminate controlled comment
\def\U{\note{This code is used in}} % cross-reference for uses of sections
\let\V=\lor % logical or
\let\W=\land % logical and
\def\X#1:#2\X{\ifmmode\gdef\XX{\null$\null}\else\gdef\XX{}\fi % section name
\XX$\langle\,$#2{\eightrm\kern.5em#1}$\,\rangle$\XX}
\def\Y{\par\yskip}
\let\Z=\let % now you can \send the control sequence \Z
\def\){\hbox{\.{@\$}}} % sign for string pool check sum
\def\]{\hbox{\.{@\\}}} % sign for forced line break
\def\=#1{\kern2pt\hbox{\vrule\vtop{\vbox{\hrule
\hbox{\strut\kern2pt\.{#1}\kern2pt}}
\hrule}\vrule}\kern2pt} % verbatim string
\let\~=\ignorespaces
\def\ellipsis{\kern5em\smash{\vdots}\qquad\vbox to12pt{}\par}
\def\hang{\hangindent 3em\noindent\ignorespaces}
\prob 10. Since \\{prev\_depth} is initially \\{ignore\_depth}, we get
\begintt
### vertical mode entered at line 1 (\output routine)
prevdepth -999.99998, prevgraf 1 line
\endtt
\prob 11. According to \sec236, $\\{int\_base}+17$ is where \\{mag} is
stored. (One of the definitions suppressed by an ellipsis on page 101 is
\\{mag}; you can verify this by checking the index!) The initial value
of \\{mag} is set in \sec240. Hence \\{show\_eqtb} branches to \sec242 and
prints `|\mag=1000|'.
\begingroup
\def\|#1{{\sevenrm(#1)}}
\prob 12. In the following chart, `\|3' means a value at level three, and
`---' is a level boundary:
$$\advance\abovedisplayskip-6pt
\def\-{---} \chardef\{=`\{ \chardef\}=`\}
\def\:{\omit\tt\quad\char`}
\halign{\kern15pt\hfil#&&\hfil\hbox to0pt{\hss#\hss}\cr
&&&&&&&&&&&&&&&\|2\cr
&&&&&&&&&&&&&&&9\cr
&&&&&&&&&&\|1&\|1&&&\-&\-\cr
&&&&&&&&&&6&6&&\|1&\|1&\|1&\|1&\|1\cr
&&&&&&&&\-&\-&\-&\-&&8&8&8&8&8\cr
&&&&&&&\|1&\|1&\|1&\|1&\|1&\|1&\|1&\|1&\|1&\|1&\|1\cr
&&&&&&&4&4&4&4&4&4&4&4&4&4&4\cr
&&&&&\|1&\|1&\|1&\|1&\|1&\|1&\|1&\|1&\|1&\|1&\|1&\|1&\|1\cr
&&&&&2&2&2&2&2&2&2&2&2&2&2&2&2\cr
\\{save\_stack}:&&&&\-&\-&\-&\-&\-&\-&\-&\-&\-&\-&\-&\-&\-&\-\cr
\\{xeq\_level}$[p]$:&\|1&\|1&\|1&\|1&\|2&\|1&\|2&\|2&\|1&\|3&\|1&\|1&\|2&\|2&\|3&\|2&\|2&\|1\cr
\\{eqtb}$[p]$.\\{int}:&0&1&2&2&3&4&5&5&6&7&8&8&9&9&10&9&10&8\cr
operations:&\:\\day=0&\:\\g&\:\\a&\:\{\ &\:\\a&\:\\g&\:\\a&\:\{\ &
\:\\g&\:\\a&\:\\g&\:\}\ &\:\\a&\:\{\ &\:\\a&\:\}\ &\:\\a&\:\}\ \cr}$$
The final value is therefore |\day=8|.
\endgroup
{\def\,{\hskip2pt}
\prob 13. (reference count), \\{match}\,|!|, \\{match}\,|#|, \\{left\_brace}\,|[|,
\\{end\_match}, \\{left\_brace}\,|{|, \\{mac\_param}\,|#|, \\{right\_brace}\,|]|,
\\{mac\_param}\,|!|, \\{out\_param}\,2, \\{left\_brace}\,|[|. Notice that the
\\{left\_brace} before the \\{end\_match} is repeated at the end of the replacement
text, because it has been matched (and therefore removed from the input).}
\prob 14. According to \sec233, \\{show\_eqtb}(\\{every\_par\_loc}) calls
\\{show\_token\_list} with the limit~$l=32$. According to \sec292, we want
the token list to contain a token that prints as many characters as possible
when $\\{tally}=31$;
the value of \\{tally} is increased on every call to \\{print\_char} (\sec58).
By studying the cases in \sec294, we conclude that the worst case occurs
when a \\{mac\_param} is printed, and when the character~$c$ actually
prints as three characters. The statement `\\{print\_esc}(|"ETC."|)' in \sec292
will print seven additional
characters if the current \\{escape\_char} is another tripler. (Longer
examples are possible only if \TeX\ has a bug that tweaks one of the
outputs `|\CLOBBERED.|' or `|\BAD.|' in \sec293; but this can't happen.)
In other words, a worst-case example such as
\begintt
\escapechar=`\↑↑M \catcode`\↑↑I=6
\everypar{1234567890123456789012345678901↑↑Ietc.}
\endtt
in connection with the suggested test line will print
\begintt
{restoring ↑↑Meverypar=1234567890123456789012345678901↑↑I↑↑I↑↑METC.}
\endtt
thereby proving that 44 characters can be printed by
\\{show\_eqtb}(\\{every\_par\_loc}).
\prob 15. Here we must look at the \\{get\_next} procedure, which scans
the \\{buffer} in strange ways when two identical characters of
category~7 (\\{sup\_mark}) are found. After the |\catcode| of open-quote
has been set to~7, \\{get\_next} begins to scan a control sequence
in \sec354, which goes to \sec355 and finds a space after |``|. Since
a space is code \O{40}, it is changed to \O{140}, and the buffer
contents are shifted left~2. By strange coincidence, \O{140} is again
an open-quote character, so we get back to \sec355, which changes
|``(| to |h| and goes back to \\{start\_cs} a third time. Now we go
to \sec356 and then back to \sec355 and \\{start\_cs}, having changed
|``)| to |i|. The fourth round, similarly, changes |```| to a blank
space, and the fifth round finishes the control sequence.
If we try to input the stated line, |INITEX| will come to a halt as follows:
\begintt
! Undefined control sequence.
<*> \catcode``=7 \hi
!\error
\endtt
This proves that the \\{buffer} now says |\hi !|.
\prob 16. The error message in question is
\begintt
! Undefined control sequence.
<*> \endlinechar=`! \error
↑↑M
\endtt
and our job is to explain the appearance of |↑↑M|. The standard |\endlinechar|
is \\{carriage\_return}, according to \sec240; this is \O{15} according
to \sec22, and \O{15} is |↑↑M| in ASCII code.
Thus, a \\{carriage\_return} is normally placed at the end of
each line when it's read into the \\{buffer} (see \sec360).
This \\{carriage\_return}
is not usually printed in an error message, because it equals the
\\{end\_line\_char} (see \sec318). We see it now because \\{end\_line\_char}
has changed.
Incidentally, if the input line had been
\begintt
\endlinechar=`!\error
\endtt
(without the space after the |!|), we wouldn't have seen the |↑↑M|. Why not?
Because \TeX\ calls \\{get\_next} when looking for the optional space
after the ASCII constant |`!| (see \sec442--443),
hence the undefined control sequence |\error|
is encountered before \\{end\_line\_char} has been changed!
\prob 17. One problem is to figure out which control sequence is undefined;
it seems to be the `|?|', since this character has been made active.
One clue is to observe from \sec312 and \sec314 that `|<recently read>|'
can be printed only when $\\{base\_ptr}=\\{input\_ptr}$, $\\{state}=
\\{token\_list}$, $\\{token\_type}=\\{backed\_up}$, and $\\{loc}=\\{null}$.
A token list of type \\{backed\_up} usually contains only a single item;
in that case, the control sequence name must be `|How did this happen?|',
and we have a problem getting an active character into a control sequence name.
But an arbitrarily long token list of type \\{backed\_up} can be created with the
|\lowercase| operation (see \sec1288). In that case, however, the right brace
that closes |\lowercase| is almost always still present in \TeX's input
state, and it would show up on the error message. (The \\{back\_list}
procedure of \sec323 does not clear a completed token list off of the stack.)
We have to make \TeX\ clear off its stack before the |}| is scanned.
At this point the exercise begins to resemble ``retrograde chess'' problems.
Here is one solution; since it requires a very long input line, it has been
broken into a three-line answer:
\begintt
\def\answer{\let~\expandafter\lccode`!=`H% [line has been broken]
~\lowercase~{~!~o~w~ ~d~i~d~ ~t~h~i~s~ % [line has been broken]
~h~a~p~p~e~n~?}}
\endtt
(The `|H|' is a lowercase `|!|';
a chain of\/ |\expandafter|'s is used to make the right brace disappear from
the stack.)
Another approach uses |\csname|, and manufactures a |?| from a |!|:
\begintt
\def\answer{\def\a##1{{\global\let##1?\aftergroup##1}}% [broken]
\escapechar`H\lccode`!|/`? % [broken]
\lowercase{\expandafter\a\csname ow did this happen!\endcsname}}
\endtt
But there is a (devious) one-line solution, which makes the
invisible \\{carriage\_return} following |\answer| into a right brace:
\begintt
\def\answer{\catcode13=2\lccode`!=H\lowercase\bgroup!ow did this happen?}
\endtt
\prob 18. (The answer to this problem was much more difficult to explain
in class than I had thought it would be, so I guess it was also much
more difficult for the students to solve than I had thought it would be.
After my first attempt to explain the answer,
I decided to make up a special version of \TeX\ that would help to
clarify the scanning routines. This special program, called Demo\TeX,
is just like ordinary
\TeX\ except that if\/ |\tracingstats>2| the user is able to watch \TeX's
syntax routines in slow motion. The changes that convert \TeX\ to
Demo\TeX\ are explained in the appendix below.
Given Demo\TeX, we tried a lot of simple
examples of things like `|\hfuzz=1.5pt|' and `|\catcode`a=11|' before
plunging into exercise~18 in which everything happens at once.
While we were discussing input stacks, by the way, we found it
helpful to consider the behavior of \TeX\ on the following input:
\begintt
\output{\botmark}
\def\a{\error}
\mark{
\everyvbox{
\everypar{
\everydisplay{
\everyhbox{
\everymath{\noexpand\a}
$\relax}
\hbox\bgroup\relax}
$$\relax}
\noindent\relax}
\vbox\bgroup\relax}
\hbox{}\vfill\penalty-10000
\endtt
Here |\penalty| triggers |\botmark|, which defines |\everyvbox| and begins
a |\vbox|, which defines |\everypar| and begins a |\par|, which defines
|\everydisplay| and begins a |\display|, etc.)
The first line is essentially `|\gdef\a#1d#2#3{#2}|', where the second `|d|'
has catcode~12 (\\{other\_char}). Hence~the second |d| will match a~|d| that
is generated by |\romannumeral|. In this line, \\{scan\_int} is called only
to scan the |`d| and the |12|.
The second line calls \\{scan\_dimen} in order to evaluate the right-hand
side of the assignment to |\hfuzz|. After \\{scan\_dimen} has used
\\{scan\_int} to read the `|100|', it calls \\{scan\_keyword} in order to
figure out the units. But before the units are known to be `|pt|' or `|pc|',
an |\ifdim| must be expanded. Here we need to call \\{scan\_dimen}
recursively, twice; it finds the value 12$\,$pt on the left-hand side,
and is interrupted again while \\{scan\_keyword} is trying to figure out
the units on the right-hand side. Now a chain of\/ |\expandafter|'s causes
|\romannumeral888| to be expanded into |dccclxxxviii|, and then we have
to parse |\a dccclxxxviii|. Here |#1| will be |\else|, |#2| and |#3| will
each be |c|; the expansion therefore reduces to |cclxxxviii\relax\fi|.
The first `|c|' completes the second `|Pc|', and the |\ifdim| test is true.
Therefore the second `|c|' can complete the first `|Pc|', and |\hfuzz|
is set equal to 1200$\,$pt. The characters |lxxxviii| now begin a
paragraph. The |\fi| takes the |\ifdim| out of \TeX's condition stack.
(The appendix below gives further information.
Examples like this give some glimmering of the weird maneuvers that can be
found in the |TRIP| test, an intricate pattern of unlikely code that is
used to validate all implementations of \TeX.)
\prob 19. If, for example, |\thickmuskip| has the value |5mu plus 5mu|
that plain \TeX\ gives it, the first command changes its value to
|-5mu plus -5mu|, because \\{scan\_glue} in \sec461 will call
\\{scan\_something\_internal} with the second argument \\{true}; this
will cause all three components of the glue to be negated (see \sec431).
The second command, on the other hand, tells \TeX\ to expand `|\the\thickmuskip|'
into a sequence of characters, so it is equivalent to
\begintt
\thickmuskip=-5mu plus 5mu
\endtt
(The minus sign doesn't carry into the stretch component of glue, since
\sec461 applies \\{negate} only to the first dimension found.)
This problem points out a well-known danger that is present in any
text-macro-expanding system.
\prob 20. We'd have a funny result that two macro texts would be considered
to match by |\ifx| unless the first one (the one starting at~$q$ when we
begin \sec508) is a proper prefix of the second. (Notice the statement
`$p\K\\{null}$' inside the {\bf while} loop.)
\prob 21. Because the byte in \\{dvi\_buf}[$\\{dvi\_ptr}-1$] is usually not
an operation code, and it just might happen to equal \\{push}.
\prob 22. $2_y\,7_d\,1_d\,8_z\,2_y\,8_z\,1_d\,8_z\,2_y\,8_z\,4_y\,5_z\,
9_d\,0_d\,4_y\,5_z$.
\prob 23. \TeX\ is in `no mode' only while processing |\write| statements,
and the mode is printed during |\write| only when
$\\{tracing\_commands}>1$ during \\{expand}. We might think that
|\catcode| operations are necessary, so that the left and right
braces for |\write| exist; but it's possible to let \TeX's error-recovery
mechanism supply them! Therefore the shortest program that meets the
requirements is probably the following one based on an idea
due to Ronaldo Am\'a, who suggests putting
\begintt
\batchmode\tracingcommands2\immediate\write!\nomode
\endtt
into a file. (Seven tokens total.)
\prob 24. When \\{error} calls \\{get\_token}, because the user has asked
for tokens to be deleted (see \sec88), a second level of \\{error} is
possible, but further deletions are disallowed (see \sec336 and \sec346).
However, insertions are still allowed, and this can lead to a third
level of \\{error} when \\{overflow} calls \\{succumb}.
For example, let's assume that $\\{max\_in\_open}=6$. Then you can type
`|\catcode`?=15 \x|' and respond to the undefined control sequence error
by saying `|i\x??|' six times. This leads to a call of \\{error} in which
six `|<insert>|' levels appear; hence $\\{in\_open}=6$, and one more insertion
will be the last straw. At this point, type `|1|'; this enters \\{error}
at a second level, from which `|i|' will enter \\{error} a third time.
(The run-time stack now has \\{main\_control} calling \\{get\_x\_token}
calling \\{expand} calling \\{error} calling \\{get\_token} calling
\\{get\_next} calling \\{error} calling \\{begin\_file\_reading} calling
\\{overflow} calling \\{error}.)
\prob 25. In \sec38, define \\{str\_number} to be the same as \\{pool\_pointer},
and define $\\{str\_end}=128$.
In \sec39, delete the declaration of \\{str\_start}.
In \sec40, declare
\Y\P\4\&{function}\1\ \37$\\{length}(\|s:\\{str\_number})$: \37\\{integer};\6
\4\&{var} \37\|t: \37\\{pool\_pointer};\2\6
\&{begin} \37$\|t\K\|s$;\6
\&{while} $\\{str\_pool}[\|t]\I\\{str\_end}$ \1\&{do}\5
$\\{incr}(\|t)$;\2\6
$\\{length}\K\|t-\|s$;\6
\&{end};\par
\Y\Q
\noindent
In \sec41, define $\\{cur\_length}\S(\\{pool\_ptr}-\\{str\_ptr})$. In %
\sec43, declare
\Y\P\4\&{function}\1\ \37\\{make\_string}: \37\\{str\_number};\C{current
string enters the pool}\6
\4\&{var} \37\|t: \37\\{str\_number};\C{the result}\2\6
\&{begin} \37$\\{str\_room}(1)$;\5
$\\{append}(\\{str\_end})$;\6
$\|t\K\\{str\_ptr}$;\5
$\\{str\_ptr}\K\\{pool\_ptr}$;\5
$\\{make\_string}\K\|t$;\6
\&{end};\par
\Y\Q
\noindent
In \sec44, we can
\Y\P\D \37$\\{flush\_string}\S$\ \&{begin} \37\1\&{repeat} \37$\\{decr}(\\{str%
\_ptr})$;\6
\4\&{until}\5
$\\{str\_pool}[\\{str\_ptr}-1]=\\{str\_end}$;\2\6
$\\{pool\_ptr}\K\\{str\_ptr}$;\6
\&{end}\par
\Y\Q
The comparison function in \sec45 is used only in \sec259, where we can
replace `\&{if} $\\{length}(\\{text}(\|p))=\|l$ \&{then} \&{if} $\\{str\_eq%
\_buf}(\\{text}(\|p),\|j)$' by
`\&{if} $\\{str\_eq\_buf}(\\{text}(\|p),\|j,\|l)$'. The function now has
three parameters:
\Y\P\4\&{function}\1\ \37$\\{str\_eq\_buf}(\|s:\\{str\_number};\,\35\|k,\39%
\|l:\\{integer})$: \37\\{boolean};\C{test equality of strings}\6
\4\&{label} \37\\{exit};\6
\4\&{var} \37\|j: \37\\{pool\_pointer};\C{running index}\2\6
\&{begin} \37$\|j\K\|s$;\5
$\|s\K\|s+\|l$;\6
\&{if} $\\{str\_pool}[\|s]\I\\{str\_end}$ \1\&{then}\5
$\\{str\_eq\_buf}\K\\{false}$\6
\4\&{else} \&{begin} \37\&{while} $\|j<\|s$ \1\&{do}\6
\&{begin} \37\&{if} $\\{str\_pool}[\|j]\I\\{buffer}[\|k]$ \1\&{then}\6
\&{begin} \37$\\{str\_eq\_buf}\K\\{false}$;\5
\\{return};\5
\&{end};\2\6
$\\{incr}(\|j)$;\5
$\\{incr}(\|k)$;\6
\&{end};\2\6
$\\{str\_eq\_buf}\K\\{true}$;\6
\&{end};\2\6
\4\\{exit}: \37\&{end};\par
\Y\Q
\noindent
The procedure of \sec46 is modified in an obvious, similar way.
The first three statements of \sec47 become just two:
`$\\{pool\_ptr}\K128$; $\\{str\_ptr}\K128$'. The body of the {\bf for} loop in %
\sec48
becomes just
\Y\P\&{if} $(\X49:Character \|k cannot be printed\X)$ \1\&{then}\6
\&{if} $\|k<\O{100}$ \1\&{then}\5
$\\{str\_pool}[\|k]\K\|k+\O{100}$\6
\4\&{else} $\\{str\_pool}[\|k]\K\|k-\O{100}$\2\6
\4\&{else} $\\{str\_pool}[\|k]\K\|k$\2\par
\Y\Q
In \sec59, variable $j$ is no longer needed. If $0\L\|s<128$ and if $s$
isn't the current new-line character, we now say
\Y\P\&{begin} \37\&{if} $\\{str\_pool}[\|s]\I\|s$ \1\&{then}\6
\&{begin} \37$\\{print\_char}(\.{"\↑"})$;\5
$\\{print\_char}(\.{"\↑"})$;\6
\&{end};\2\6
$\\{print\_char}(\\{str\_pool}[\|s])$;\6
\&{end}\par
\Y\Q
\noindent
In the other case, where $\|s\G128$, we say
\Y\P\&{while} $\\{str\_pool}[\|s]\I\\{str\_end}$ \1\&{do}\6
\&{begin} \37$\\{print\_char}(\\{str\_pool}[\|s])$;\5
$\\{incr}(\|s)$;\6
\&{end}\2\par
\Y\Q
\noindent
In \sec407, similarly, variable $k$ is eliminated; the loop on~$k$
becomes a loop on~$s$, \&{while} $\\{str\_pool}[\|s]\I\\{str\_end}$.
In \sec464, replace the two occurrences of `$\\{str\_start}[\\{str\_ptr}]$' by
`\\{str\_ptr}'.
The first loop in \sec603 becomes
\Y\P$\|k\K\\{font\_area}[\|f]$;\6
\&{while} $\\{str\_pool}[\|k]\I\\{str\_end}$ \1\&{do}\6
\&{begin} \37$\\{dvi\_out}(\\{str\_pool}[\|k])$;\5
$\\{incr}(\|k)$;\6
\&{end}\2\par
\Y\Q
\noindent and the second is like unto it.
\prob 26. Let's assume that we have a machine in which \\{str\_pool} is
addressed by byte number, so that 8-bit values take no more space than
7-bit values. Method~(a) requires us to impose a limit on the length
of strings: 255 characters max. This isn't unreasonable, because the only
important use of longer strings is in the implementation of\/ |\special|,
when the restriction doesn't actually apply (since \sec1368 doesn't
call \\{make\_string}). But method~(a) saves no space and little or no time
by comparison with the simpler method of problem~25. Problem~25 saves
about one byte per string, compared to the text's way. Method~(b) saves
another byte per string but at the expense of considerable programming
complexity; it requires awkward special-casing to deal with empty strings.
\prob 27. We'd replace `\\{width}$(g)$' by `$\\{width}(g)+
\\{shift\_amount}(g)$' (twice). Similar changes would be needed in \sec656.
(But a box shouldn't be able to retain its \\{shift\_amount}; this quantity
is a property of the list the box is in, not a property of the box itself.)
\prob 28. The final line has infinite stretchability, since plain \TeX\
sets |\parfillskip=0pt plus 1fil|. Reports of loose, tight, underfull,
or overfull boxes are never made unless $o=\\{normal}$ in \sec658 and \sec664.
\prob 29. If a vbox is repackaged as an hbox, we get really weird results
because things that were supposed to stack up vertically are placed together
horizontally. The second change would be a lot less visible, except in
characters like $V$ where there is a large italic correction; the character
would be centered without taking its italic correction into account.
(The italic correction in math mode is the difference between horizontal
placement of superscripts and subscripts in formulas like $V_2↑2$.)
\prob 30. The spacing can be found by saying
\begintt
$x==1$ $x++1$ $x,,1$ \tracingall\showlists.
\endtt
Most of the decisions are made in \sec766, using the spacing table of\/
\sec764. But the situation is trickier in the case of~|+|, because a
\\{bin\_noad} must be preceded and followed by a noad of a suitable class.
In the formula |$x++1$|, the second~|+| is changed from \\{bin\_noad} to
\\{ord\_noad} in \sec728. It turns out that thick spaces are inserted after the~$x$
and before the~1 in `$x==1$'; medium spaces are inserted before each~$+$ sign in
`$x++1$'; thin spaces are inserted after each comma in `$x,,1$'.
\prob 31. The behavior of the simpler algorithm, which we may call Brand~X,
can be deduced from the demerits values (`|d=|') in the trace output.
There is only one reasonable choice, |@@1|, for the first line; and there's
only one, |@@2|, for the second. But for the third, a line from |@@2| to
|@@3| (the break after `|para-|') has 46725 demerits, which certainly looks
worse than the 1225 demerits from |@@2| to |@@4|. This, however, leads Brand~X
into a trap, since there's no good way to continue from |@@4|. Similarly,
Brand~X will choose to go from |@@7| to |@@9|, and this forces it to
|@@11| and then infelicitously to |@@13| (because the syllable `|break-|'
is too long to be squeezed in). The resulting paragraph, as typeset by
Brand~X, looks like this (awful):
$$\vbox{\hbadness=2000
\hsize=3.18in
\pretolerance=-1
\prob 31. When your instructor made up this problem, he said
`|\tracingparagraphs=1|' so that his transcript file would explain why
\TeX\ has broken the paragraph\break into lines in a particular way. He also said\break
`|\pretolerance=-1|' so that hyphenation would be tried immediately.
The output is shown on the next page; use it to determine
what line breaks would have\break been found by a simpler algorithm
that breaks one line at a time. (The simpler algorithm finds the
breakpoint that yields fewest demerits on the first line,
then chooses it and starts over again.)}$$
\prob 32. (This exercise takes awhile, but the data structures are especially
interesting; the hyphenation algorithm is a nice little part of the program
that can be studied in isolation.) The following tables are constructed:
$$\setbox0=\hbox{]} \def\]{\kern-\wd0}
\vbox{\halign{&\kern20pt\hfil#\quad&\hfil#\quad&\hfil#\quad&\hfil#\qquad\cr
&\\{op}\]&\\{char}\]&\\{link}\]&&[1]\]&[2]\]&[3]\]\cr
\noalign{\vskip2pt}
\\{trie}[96]&0&96&1&\\{hyf\_distance}&2&0&3\cr
\\{trie}[97]&0&97&5&\\{hyf\_num}&1&3&2\cr
\\{trie}[98]&0&97&2&\\{hyf\_next}&0&0&2\cr
\\{trie}[100]&1&98&3\cr
\\{trie}[102]&1&99&4\cr
\\{trie}[103]&0&98&6\cr
\\{trie}[105]&3&99&4\cr}}$$
Given the word |aabcd|, it is interesting to watch \sec923 produce the
hyphenation numbers
`{\def\\#1{$_{\kern\scriptspace#1}$}\tt\\0a\\0a\\2b\\1c\\0d\\3}' from this trie.
\prob 33. The idea is to keep line numbers on the save stack. Scott Douglass
has observed that, although \TeX\ is careful to keep \\{cur\_boundary} up to date,
nothing important is ever done with it; hence the \\{save\_index} field in
level-boundary words is not needed, and we have an extra halfword to play with!
(The present data structure has fossilized elements left over from old
incarnations of \TeX.)
However, line numbers might get larger than a halfword; it seems better
to store them as fullword integers.
This problem requires changes to three parts of the program. First, we can
extend \sec1063 as follows:
\Y\P$\4\X1056:Cases of \\{main\_control} that build boxes and lists\X%
\mathrel{+}\S$\6
\4$\\{non\_math}(\\{left\_brace})$: \37\&{begin} \37$\\{saved}(0)\K\\{line}$;\5
$\\{incr}(\\{save\_ptr})$;\5
$\\{new\_save\_level}(\\{simple\_group})$;\6
\&{end};\C{the line number is saved for possible use in warning message}\6
\4$\\{any\_mode}(\\{begin\_group})$: \37\&{begin} \37$\\{saved}(0)\K\\{line}$;\5
$\\{incr}(\\{save\_ptr})$;\5
$\\{new\_save\_level}(\\{semi\_simple\_group})$;\6
\&{end};\6
\4$\\{any\_mode}(\\{end\_group})$: \37\&{if} $\\{cur\_group}=\\{semi\_simple%
\_group}$ \1\&{then}\6
\&{begin} \37\\{unsave};\5
$\\{decr}(\\{save\_ptr})$;\C{pop unused line number from stack}\6
\&{end}\6
\4\&{else} \\{off\_save};\2\par
\Y\Q\noindent
A similar change is needed in \sec1068, where the first case becomes
\Y\P\4\\{simple\_group}: \37\&{begin} \37\\{unsave};\5
$\\{decr}(\\{save\_ptr})$;\C{pop unused line number from stack}\6
\&{end};\par
\Y\Q
Finally, we replace lines 6--11 of \sec1335 by code for the desired messages:
\Y\P
\&{while} $\\{cur\_level}>\\{level\_one}$ \1\&{do}\6
\&{begin} \37$\\{print\_nl}(\.{"("})$;\5
$\\{print\_esc}(\.{"end\ occurred\ when\ "})$;\6
\&{case} $\\{cur\_group}$ \1\&{of}\6
\4\\{simple\_group}: \37$\\{print\_char}(\.{"\{"})$;\6
\4\\{semi\_simple\_group}: \37$\\{print\_esc}(\.{"begingroup"})$;\6
\4\&{othercases} \37$\\{confusion}(\.{"endgroup"})$\2\6
\&{endcases};\6
$\\{print}(\.{"\ on\ line\ "})$;\5
\\{unsave};\5
$\\{decr}(\\{save\_ptr})$;\5
$\\{print\_int}(\\{saved}(0))$;\5
$\\{print}(\.{"\ was\ incomplete)"})$;\6
\&{end};\2\6
\&{while} $\\{cond\_ptr}\I\\{null}$ \1\&{do}\6
\&{begin} \37$\\{print\_nl}(\.{"("})$;\5
$\\{print\_esc}(\.{"end\ occurred\ when\ "})$;\5
$\\{print\_cmd\_chr}(\\{if\_test},\39\\{cur\_if})$;\par
\Y\Q
\def\TeXX{\TeX\kern-.3emX}
\prob 34. First, \sec2 gets a new paragraph explaining what \TeXX\ is, and the
banner line changes:
\Y\P\D \37$\\{banner}\S\.{\'This\ is\ TeXX,\ Version\ 2.2\'}$\C{printed when %
\TeX\ starts}\par\Y\Q\noindent
Then we add two new definitions in \sec134:
\Y\P\D \37$\\{is\_xchar\_node}(\#)\S(\\{font}(\#)=\\{font\_base})$\C{is this %
\\{char\_node} extended?}\par
\P\D \37$\\{bypass\_xchar}(\#)\S$\1\6
\&{if} $\\{is\_xchar\_node}(\#)$ \1\&{then}\5
$\#\K\\{link}(\#)$\2\2\par
\Y\Q\noindent (It's necessary to say \\{font\_base} here instead of
\\{null\_font}, because \\{null\_font} isn't defined until later.)
The \\{short\_display} routine of \sec174 can treat an |\xchar| like
an ordinary character, because \\{print\_ASCII} makes no restrictions. Here is
one way to handle the change:
\Y\P\4\&{procedure}\1\ \37$\\{short\_display}(\|p:\\{integer})$;\C{prints
highlights of list \|p}\6
\4\&{label} \37\\{done};\6
\4\&{var} \37\|n: \37\\{integer};\C{for replacement counts}\6
\\{ext}: \37\\{integer};\C{amount added to character code by xchar}\2\6
\&{begin} \37$\\{ext}\K0$;\6
\&{while} $\|p>\\{mem\_min}$ \1\&{do}\6
\&{begin} \37\&{if} $\\{is\_char\_node}(\|p)$ \1\&{then}\6
\&{begin} \37\&{if} $\|p\L\\{mem\_end}$ \1\&{then}\6
\&{begin} \37\&{if} $\\{is\_xchar\_node}(\|p)$ \1\&{then}\6
\&{begin} \37$\\{ext}\K256\ast(\\{qo}(\\{character}(\|p)))$;\5
\&{goto} \37\\{done};\6
\&{end};\2\6
\&{if} $\\{font}(\|p)\I\\{font\_in\_short\_display}$ \1\&{then}\6
\&{begin} \37\&{if} $(\\{font}(\|p)<\\{font\_base})\V(\\{font}(\|p)>\\{font%
\_max})$ \1\&{then}\5
$\\{print\_char}(\.{"*"})$\6
\4\&{else} \X267:Print the font identifier for $\\{font}(\|p)$\X;\2\6
$\\{print\_char}(\.{"\ "})$;\5
$\\{font\_in\_short\_display}\K\\{font}(\|p)$;\6
\&{end};\2\6
$\\{print\_ASCII}(\\{ext}+\\{qo}(\\{character}(\|p)))$;\5
$\\{ext}\K0$;\6
\&{end};\2\6
\&{end}\6
\4\&{else} \X175:Print a short indication of the contents of node \|p\X;\2\6
\4\\{done}: \37$\|p\K\\{link}(\|p)$;\6
\&{end};\2\6
\&{end};\par
\Y\Q\noindent
A somewhat similar change applies in \sec176:
\Y\P\4\&{procedure}\1\ \37$\\{print\_font\_and\_char}(\|p:\\{integer})$;%
\C{prints \\{char\_node} data}\6
\4\&{label} \37\\{reswitch};\6
\4\&{var} \37\\{ext}: \37\\{integer};\C{amount added to character code by
xchar, or $-1$}\2\6
\&{begin} \37$\\{ext}\K-1$;\6
\4\\{reswitch}: \37\&{if} $\|p>\\{mem\_end}$ \1\&{then}\5
$\\{print\_esc}(\.{"CLOBBERED."})$\6
\4\&{else} \&{begin} \37\&{if} $\\{is\_xchar\_node}(\|p)$ \1\&{then}\6
\&{begin} \37$\\{ext}\K\\{qo}(\\{character}(\|p))$;\5
$\|p\K\\{link}(\|p)$;\5
\&{goto} \37\\{reswitch};\5
\&{end};\2\6
\&{if} $(\\{font}(\|p)<\\{font\_base})\V(\\{font}(\|p)>\\{font\_max})$ \1%
\&{then}\5
$\\{print\_char}(\.{"*"})$\6
\4\&{else} \X267:Print the font identifier for $\\{font}(\|p)$\X;\2\6
$\\{print\_char}(\.{"\ "})$;\6
\&{if} $\\{ext}<0$ \1\&{then}\5
$\\{print\_ASCII}(\\{qo}(\\{character}(\|p)))$\6
\4\&{else} \&{begin} \37$\\{print\_esc}(\.{"xchar"})$;\5
$\\{print\_hex}(\\{ext}\ast256+\\{qo}(\\{character}(\|p)))$;\6
\&{end};\2\6
\&{end};\2\6
\&{end};\par
\Y\Q\noindent
(These routines must be extra-robust.)
The first line of code in \sec183 now becomes
\Y\P\&{if} $\\{is\_char\_node}(\|p)$ \1\&{then}\6
\&{begin} \37$\\{print\_font\_and\_char}(\|p)$;\5
$\\{bypass\_xchar}(\|p)$;\6
\&{end}\par\Y\Q
In \sec208 we introduce a new operation code,
\Y\P\D \37$\\{xchar\_num}=17$\C{extended character ( \.{\\xchar} )}\par
\Y\Q\noindent
Every opcode that follows it
in \sec208 and \sec209, from \\{math\_char\_num} to
\\{max\_command}, must be increased by~1.
We also add the following lines to \sec265 and \sec266, respectively:
\Y\P$\\{primitive}(\.{"xchar"},\39\\{xchar\_num},\390)$;\par
\P\4\\{xchar\_num}: \37$\\{print\_esc}(\.{"xchar"})$;\par
\Y\Q\noindent
This puts the new command into \TeXX's repertoire.
The next thing we need to worry about is what to do when |\xchar| occurs
in the input.
It's convenient to add a companion procedure to \\{scan\_char\_num} in \sec435:
\Y\P\4\&{procedure}\1\ \37\\{scan\_xchar\_num};\2\6
\&{begin} \37\\{scan\_int};\6
\&{if} $(\\{cur\_val}<0)\V(\\{cur\_val}>65535)$ \1\&{then}\6
\&{begin} \37$\\{print\_err}(\.{"Bad\ character\ code"})$;\5
$\\{help2}(\.{"An\ \\xchar\ number\ must\ be\ between\ 0\ and\ 255."})$\6
$(\.{"I\ changed\ this\ one\ to\ zero."})$;\5
$\\{int\_error}(\\{cur\_val})$;\5
$\\{cur\_val}\K0$;\6
\&{end};\2\6
\&{end};\par\Y\Q\noindent
Similarly, \\{new\_character} gets a companion in \sec582:
\Y\P\4\&{function}\1\ \37$\\{new\_xchar}(\|f:\\{internal\_font\_number};\,\35\|c:%
\\{integer})$: \37\\{pointer};\6
\4\&{var} \37$\|p,\39\|q$: \37\\{pointer};\C{newly allocated nodes}\2\6
\&{begin} \37$\|q\K\\{new\_character}(\|f,\39\|c\mathbin{\&{mod}}256)$;\6
\&{if} $\|q=\\{null}$ \1\&{then}\5
$\\{new\_xchar}\K\\{null}$\6
\4\&{else} \&{begin} \37$\|p\K\\{get\_avail}$;\5
$\\{font}(\|p)\K\\{font\_base}$;\5
$\\{character}(\|p)\K\\{qi}((\|c\mathbin{\&{div}}256))$;\5
$\\{link}(\|p)\K\|q$;\5
$\\{new\_xchar}\K\|p$;\6
\&{end};\2\6
\&{end};\par
\Y\Q
\goodbreak
Extended characters can be output properly if we replace the opening lines
of the code in \sec620 by these:
\Y\P\4\\{reswitch}: \37\&{if} $\\{is\_char\_node}(\|p)$ \1\&{then}\6
\&{begin} \37\\{synch\_h};\5
\\{synch\_v};\6
\1\&{repeat} \37\&{if} $\\{is\_xchar\_node}(\|p)$ \1\&{then}\6
\&{begin} \37$\|f\K\\{font}(\\{link}(\|p))$;\6
\&{if} $\\{character}(\|p)=\\{qi}(0)$ \1\&{then}\5
$\|p\K\\{link}(\|p)$;\C{bypass zero extension}\2\6
\&{end}\6
\4\&{else} $\|f\K\\{font}(\|p)$;\2\6
$\|c\K\\{character}(\|p)$;\6
\&{if} $\|f\I\\{dvi\_f}$ \1\&{then}\5
\X621:Change font \\{dvi\_f} to \|f\X;\2\6
\&{if} $\\{is\_xchar\_node}(\|p)$ \1\&{then}\6
\&{begin} \37$\\{dvi\_out}(\\{set1}+1)$;\5
$\\{dvi\_out}(\\{qo}(\|c))$;\5
$\|p\K\\{link}(\|p)$;\5
$\|c\K\\{character}(\|p)$;\6
\&{end}\6
\4\&{else} \&{if} $\|c\G\\{qi}(128)$ \1\&{then}\5
$\\{dvi\_out}(\\{set1})$;\2\2\6
$\\{dvi\_out}(\\{qo}(\|c))$;\par
\Y\Q
Many of the processing routines include a statement of the form `$f\K\\{font}(\#)$',
which we want to do only after bypassing the first half of an extended character.
This can be done by inserting the following statements:
$$\advance\baselineskip2pt
\vbox{\halign{\\{bypass\_xchar}(#) \hfil&in \sec#\hfil\cr
$p$&654;\cr
$s$&842;\cr
\\{cur\_p}&867;\cr
$s$&871;\cr
$p$&1147.\cr}}$$
In \sec841 we need to do a little more than a simple bypass:
\Y\P\&{if} $\\{is\_char\_node}(\|v)$ \1\&{then}\6
\&{begin} \37\&{if} $\\{is\_xchar\_node}(\|v)$ \1\&{then}\6
\&{begin} \37$\|v\K\\{link}(\|v)$;\5
$\\{decr}(\|t)$;\C{an xchar counts as two chars}\6
\&{end};\par
\Y\Q
Two changes are needed in order to suppress hyphenation in words that contain
extended characters. First we insert
\Y\P\&{if} $\\{hf}=\\{font\_base}$ \1\&{then}\5
\&{goto} \37\\{done1};\C{$\\{is\_xchar\_node}(\|s)$}\par\Y\Q
\noindent after the third line of \sec896. Then we replace `{\bf endcases};'
in \sec899 by
\Y\P\&{endcases}\6
\4\&{else} \&{if} $\\{is\_xchar\_node}(\|s)$ \1\&{then}\5
\&{goto} \37\\{done1};\par\Y\Q
If\/ |\xchar| appears in math mode, we want to recover from the error by
including $\\{mmode}+\\{xchar\_num}$ in the list of cases in \sec1046.
If\/ |\xchar| appears in vertical mode, we want to begin a paragraph by
including $\\{vmode}+\\{xchar\_num}$ in the second list of cases in \sec1090.
\Y
But what it |\xchar| appears in horizontal mode? To handle this, we might
as well rewrite \sec1122:
\M1122. We need only two more things to complete the horizontal mode
routines, namely
the \.{\\xchar} and \.{\\accent} primitives.
\Y\P$\4\X1056:Cases of \\{main\_control} that build boxes and lists\X%
\mathrel{+}\S$\6
\4$\\{hmode}+\\{xchar\_num}$: \37\&{begin} \37\\{scan\_xchar\_num};\5
$\\{link}(\\{tail})\K\\{new\_xchar}(\\{cur\_font},\39\\{cur\_val})$;\6
\&{if} $\\{link}(\\{tail})\I\\{null}$ \1\&{then}\5
$\\{tail}\K\\{link}(\\{link}(\\{tail}))$;\2\6
$\\{space\_factor}\K1000$;\6
\&{end};\6
\4$\\{hmode}+\\{accent}$: \37\\{make\_accent};\par
\fi
\goodbreak
\def\ellipsis{\kern50pt\smash{\vdots}\qquad\vbox to12pt{}\par}
\Y\Q Finally, we need to extend \\{make\_accent} so that extended characters
can be accented. (Problem~34 didn't call for this explicitly, but \TeXX\
should surely do it.) This means adding a new case in \sec1124:
\Y\P\4\&{else} \&{if} $\\{cur\_cmd}=\\{xchar\_num}$ \1\&{then}\6
\&{begin} \37\\{scan\_xchar\_num};\5
$\|q\K\\{new\_xchar}(\|f,\39\\{cur\_val})$;\6
\&{end}\par
\Y\Q\noindent
and making changes at the beginning and end of \sec1125:
\Y\P$\4\X1125\*:Append the accent with appropriate kerns, then set $\|p\K\|q$\X%
\S$\6
\&{begin} \37$\|t\K\\{slant}(\|f)/\\{float\_constant}(65536)$;\6
\&{if} $\\{is\_xchar\_node}(\|q)$ \1\&{then}\5
$\|i\K\\{char\_info}(\|f)(\\{character}(\\{link}(\|q)))$\6
\4\&{else} $\|i\K\\{char\_info}(\|f)(\\{character}(\|q))$;\2\6
$\|w\K\\{char\_width}(\|f)(\|i)$;\6
\ellipsis\6
$\\{subtype}(\\{tail})\K\\{acc\_kern}$;\5
$\\{link}(\|p)\K\\{tail}$;\6
\&{if} $\\{is\_xchar\_node}(\|q)$ \1\&{then}\C{in this case we want to bypass
the xchar part}\6
\&{begin} \37$\\{tail\_append}(\|q)$;\5
$\|p\K\\{link}(\|q)$;\6
\&{end}\6
\4\&{else} $\|p\K\|q$;\2\6
\&{end}\par
\Q
\prob 35. The main reason for preferring the method of problem 34 is that
the italic correction operation (\sec1113) would be extremely difficult with
the other scheme. Other advantages are: (a)~Division by 256 is needed only once;
\TeXX's main loops remain fast. (b)~Comparatively few changes from \TeX\
itself are needed, hence other ripoffs of \TeX\ can easily incorporate the
same ideas. (c)~Since fonts don't need to be segregated into `oriental'
and `occidental', |\xchar| has wide applicability. For example, it gives
users a way to suppress ligatures and kerns; it allows large fonts to have
efficient
256-character subsets of commonly-used characters. (d)~The conventions of
\TeXX\ match those of the |GF| files produced by {\logo METAFONT}.
The only disadvantage of the \TeXX\ method is that it requires all characters
whose codes differ by multiples of~256 to have the same box size. But this
is a minor consideration.
\bigbreak
\noindent{\bf Appendix}
\nobreak\smallskip\noindent
The solution to problem 18 refers to a special version of \TeX\ called
Demo\TeX, which allows users to see more details of the scanning process.
Demo\TeX\ is formed by making a few changes to parts 24--26 of \TeX.
First, in \sec341, the following code is placed between `\\{exit}:'\ and
`{\bf end}':
\Y\P
\37\&{if} $\\{tracing\_stats}>2$ \1\&{then}\6
\&{begin} \37$\|k\K\\{trace\_depth}$;\5
$\\{print\_nl}(\.{""})$;\6
\&{while} $\|k>0$ \1\&{do}\6
\&{begin} \37$\\{print}(\.{"\ "})$;\5
$\\{decr}(\|k)$;\6
\&{end};\2\6
$\\{print}(\.{"\|"})$;\5
$\\{print\_char}(\.{"\ "})$;\6
\&{if} $\\{cur\_cs}>0$ \1\&{then}\6
\&{begin} \37$\\{print\_cs}(\\{cur\_cs})$;\5
$\\{print\_char}(\.{"="})$;\6
\&{end};\2\6
$\\{print\_cmd\_chr}(\\{cur\_cmd},\39\\{cur\_chr})$;\6
\&{end};\par
\Y\Q\noindent
(A new global variable, \\{trace\_depth}, is declared somewhere and initialized
to zero. It is used to indent the output of Demo\TeX\ so that the depth of
subroutine nesting is displayed.)
At the beginning of \\{expand} (in \sec366), we put the statements
\Y\P
$\\{incr}(\\{trace\_depth})$;\6
\&{if} $\\{tracing\_stats}>2$ \1\&{then}\5
$\\{print}(\.{"\ <x"})$;\par
\Y\Q\noindent
this prints `|<x|' when \\{expand} begins to expand something. The same statements
are inserted at the beginning of \\{scan\_int} (\sec400), \\{scan\_dimen}
(\sec448), and \\{scan\_glue} (sec461),
except that \\{scan\_int} prints `|<i|', \\{scan\_dimen}
prints `|<d|', and \\{scan\_glue} prints `|<g|'. (Get it?) We also insert
complementary code at the end of each of these procedures:
\Y\P
$\\{decr}(\\{trace\_depth})$;\6
\&{if} $\\{tracing\_stats}>2$ \1\&{then}\5
$\\{print\_char}(\.{">"})$;\par
\Y\Q\noindent
this makes it clear when each part of the scanner has done its work.
Finally, \\{scan\_keyword} is instrumented in a similar way, but with additional
information about what part of a keyword it is seeking. The code
\Y\P$\\{incr}(\\{trace\_depth})$;\6
\&{if} $\\{tracing\_stats}>2$ \1\&{then}\6
\&{begin} \37$\\{print}(\.{"\ <\'"})$;\5
$\\{print}(\|s)$;\5
$\\{print\_char}(\.{"\'"})$;\6
\&{end};\par
\Y\Q\noindent
is inserted at the beginning of \sec407, and
\Y\P\&{if} $\\{tracing\_stats}>2$ \1\&{then}\5
$\\{print\_char}(\.{"*"})$;\2\6
\4\\{exit}: \37$\\{decr}(\\{trace\_depth})$;\6
\&{if} $\\{tracing\_stats}>2$ \1\&{then}\5
$\\{print\_char}(\.{">"})$;\2\6
\&{end};\par\Y\Q\noindent
replaces the code at the end. (Here `|*|' denotes `success': the keyword was found.)
For example, here's the beginning
of what Demo\TeX\ prints out when scanning the right-hand
side of the assignment to |\hfuzz| in problem~18:
\begintt
|! the character = <d
|! the character 1 <i
|! the character 1
|! the character 0
|! the character 0
|! the letter P>
|! the letter P <'em'
|! the letter P> <'ex'
|! the letter P> <'true'
|! the letter P> <'pt'
|! the letter P
|! \ifdim =\ifdim <x <d
|! the character 1 <i
|! the character 1
|! the character 2
|! the letter p>
|! the letter p <'em'
|! the letter p> <'ex'
|! the letter p> <'true'
|! the letter p> <'pt'
|! the letter p
|! the letter t*>
|! the character =>
\endtt
(After seeing `|=|', \TeX\ calls \\{scan\_dimen}. The next character seen is
`|1|'; \\{scan\_dimen} puts it back to be read again and calls \\{scan\_int},
which finds `|100|', etc.
This output demonstrates the fact that
\TeX\ frequently uses \\{back\_input} to reread a character,
when it isn't quite ready to deal with that character.)
\bigbreak
\noindent{\bf Acknowledgement}
\nobreak\smallskip\noindent
I wish to thank the brave students of my experimental class
for motivating me to think of these questions, for sticking with me
when the questions were impossible to understand,
and for making many improvements to my original answers.
\endgroup\bye